#################################################################
## compute "panel-corrected" standard errors a la Beck and Katz
## simon jackman, dept of political science, stanford university
## june 2003
#################################################################

#################################################################
## function pcse
##
## inputs:
## lmobj: an object return by lm, e.g., foo <- lm(y ~ x1 + x2)
## group: unit id
##
## returns:  a k-by-k variance covariance matrix, where k is the
## number of predictors in lmobj (including the intercept)
## the "panel-corrected" standard errors are the square root of
## the diagonal of this matrix
#################################################################

pcse <- function(lmobj,group){
  e <- resid(lmobj)                                 ## ols residuals
  id <- unique(group)                               
  n <- length(id)
  J <- length(e)/n
  u <- matrix(NA,J,n)
  for(i in 1:n){
    u[,i] <- e[group==id[i]]
  }
  Sigma <- crossprod(u)/J                           ## MLE
#  cat("Cross-Unit Error Var-Covar Matrix (MLE)\n")
#  print(Sigma)

  X <- model.matrix(lmobj)                          ## X from lmobj
  k <- dim(X)[2]
  V1 <- matrix(0,k,k)
  V2 <- matrix(0,k,k)
  for(i in 1:n){                                    ## loop over units
    oki <- group==id[i]
    V1 <- V1 + crossprod(X[oki,])                   ## accumulate x-products
    for(j in 1:n){                                  ## loop again for x-correlations
      okj <- group==id[j]
      V2 <- V2 + (Sigma[i,j] * t(X[oki,])%*%X[okj,]) ## the "middle matrix"
    }
  }
  iV1 <- solve(V1)
  V <- iV1%*%V2%*%iV1
  V
}

debug.print=function(astring) {
print(astring)
flush.console()
}

pcse2 <- function(lmobj,group){
  e <- resid(lmobj)                                 ## ols residuals
  id <- unique(group)                               
  n <- length(id)
  J <- length(e)/n
  u <- matrix(NA,J,n)
#don't compute Sigma. For large numbers of panels Sigma
#is immense.
#  for(i in 1:n){
#    u[,i] <- e[group==id[i]]
#  }
#  Sigma <- crossprod(u)/J                           ## MLE
#  cat("Cross-Unit Error Var-Covar Matrix (MLE)\n")
#  print(Sigma)

#makes the assumptions the data contains the panels in order
#this avoids commands like oki=(group==id[i])
#which are very slow and memory intensive for large datasets

  indices=matrix(c(1:(J*n)),J,n)
debug.print("just finished indices")
  for (i in 1:n) {u[,i]=e[indices[,i]]}
debug.print("filled u matrix")

#panels are the columns of indices
#e.g. i^th panel is the indices in indices[,i]

  X <- model.matrix(lmobj)                          ## X from lmobj
  k <- dim(X)[2]
  V1 <- matrix(0,k,k)
  V2 <- matrix(0,k,k)
  for(i in 1:n){                                    ## loop over units
    debug.print(i)
    oki <- indices[,i]
    V1 <- V1 + crossprod(X[oki,])                   ## accumulate x-products
    for(j in 1:n){                                  ## loop again for x-correlations
      okj <- indices[,j]
      temp=mean(u[,i]*u[,j])*crossprod(X[oki,],X[okj,])
      V2 <- V2 + temp ## the "middle matrix"
    }
  }
  iV1 <- solve(V1)
  V <- iV1%*%V2%*%iV1
  V
}

debug.print=function(astring) {
print(astring)
flush.console()
}


#################################################################
## compute "clustered" standard errors a la Stata
## Ben Goodrich, Government, Harvard University
## April 2005
#################################################################

#################################################################
## function robust
##
## inputs:
## lmobj: an object return by lm, e.g., foo <- lm(y ~ x1 + x2)
## group: unit id
##
## returns:  a list with k-by-k variance covariance matrices, 
## where k is the number of predictors in lmobj
## the "robust" standard errors are the square root of
## the diagonal of this matrix
## There are options to do other kinds of SEs, email me questions
#################################################################

robust <- function(lmobj, group, Arellano = FALSE, Kiefer = FALSE, Stata = TRUE, White = FALSE) {
        out <- list() # This list will hold the results the function produces
        X <- model.matrix(lmobj) # The RHS of the model
        invXpX <- solve(crossprod(X)) # (X'X)^-1
        e <- residuals(lmobj) # Y - X%*%beta
        u.2 <- u.1 <- matrix(0, nrow = length(unique(group)), ncol = ncol(X)) # N x K zero matrix
        S.2 <- S.1 <- matrix(0, nrow = nrow(invXpX), ncol = ncol(invXpX))  # K x K zero matrix
        
        if (Arellano | Stata) {
          Xe <- X * e # This is NOT matrix multiplication; it multiplies each row of X by a different scalar (e_it)                  
          for(i in 1:length(unique(group))) { # Loop over units
            if(Arellano | Stata) {
              u.1[i,] <- colSums(as.matrix(Xe[group == unique(group)[i], ])) # Sum the columns of Xe for unit i
              S.1 <- S.1 + u.1[i,] %*% t(u.1[i,]) # Increment S.1 by (u.1_i u.1_i')
            }
          }
        }
        
        if (Kiefer) {
          S.2 <- 0
          for(i in 1:length(unique(group))) {
            u <- as.matrix(e[group == group[i]])
            S.2 <- S.2 + u %*% t(u)
          }
          S.2 <- S.2/length(e)
          V <- 0
          for(i in 1:length(unique(group))) {
            x <- as.matrix(X[group == unique(group)[i],])
            V <- V + t(x) %*% S.2 %*% x
          }
          out$VCV.Kiefer <- invXpX %*% V %*% invXpX
        }
        
        if (White) {
          S.3 <- 0
          for(i in 1:length(e)) {
            S.3 <- S.3 + e[i]^2 * (X[i,] %*% t(X[i,]))
          }
          S.3 <- S.3/length(e)
          out$VCV.White <- length(e) * (invXpX %*% S.3 %*% invXpX)
        }
        
        if(Arellano | Stata) {
          VCV.Arellano <- invXpX %*% S.1 %*% invXpX
          if(Stata) {
            out$VCV.Stata <- VCV.Arellano * ((nrow(X)-1)/(nrow(X) - ncol(X) - length(unique(group)))) * 
                         (length(unique(group))/(length(unique(group))-1))
          }
          if(Arellano) out$VCV.Arellano <- VCV.Arellano
        }
       out
  }
